# -*- coding:utf8 -*-
# !/usr/bin/env python


import re
import urllib2
from bs4 import BeautifulSoup
from utils import kill_captcha
from scpy.logger import get_logger
from get_page import *
import requests
import json
import copy
import traceback
import table
import datetime
from scpy.xawesome_time import parse_time
import sd_template_dict as TE
import sd_format as FO
import js_trans_dict as TR
import time

logger = get_logger(__file__)

ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36"


def download_captcha_kill(companyName):
    index_url = "http://www.jsgsj.gov.cn:58888/province/"
    img_url = 'http://www.jsgsj.gov.cn:58888/province/rand_img.jsp?type=8'
    req = requests.session()
    req.headers = {'User-Agent': ua}
    index_res = req.get(index_url)
    img_response = req.get(img_url)
    cookie = index_res.cookies.items()

    if not cookie or not cookie[0] or len(cookie[0]) != 2:
        raise Exception("cookie 获取失败")
    cookie = cookie[0][1]
    img_res = img_response.content
    # print img_res

    # with open('./hain.jpg', 'wb') as fp:
    #     fp.write(img_res)
    # captcha = raw_input("captcha=")

    # 验证码
    try:
        captcha = kill_captcha(img_res, 'js', 'png')
    except Exception, e:
        logger.error("破解验证码的服务，出现异常")
        logger.error(e)
        raise e
    if not captcha or len(captcha) > 100 or str(captcha) in ['None', 'wrong']:
        logger.error('验证码为:%s' % captcha)
        logger.error("破解验证码的服务，出现异常,可能是下载的验证码错误，也可能破解服务出现异常")
        # 返回空字符串，用于重复破解
        return ''
    captcha = re.sub('\W', '', captcha)
    if len(captcha) != 6:
        # 江苏为6位验证码,返回空字符串，用于重复破解
        return ''
    logger.info('验证码为:%s' % captcha)

    com_list_url = 'http://www.jsgsj.gov.cn:58888/province/infoQueryServlet.json?queryCinfo=true'
    com_list_data = {"verifyCode": captcha, "name": companyName}
    com_list_res = req.post(url=com_list_url, data=com_list_data, timeout=8).content

    # 该IP在一小时内超过了查询的限定次数，限制当天访问！
    # 该IP在一天内超过了查询的限定次数，限制其访问3天！
    if '该IP' in com_list_res:
        time.sleep(10)
        logger.error("IP is barred!!!!!!!")
        return ''

    if '验证码填写错误' in com_list_res:
        return ''

    if '没有符合查询条件的结果' in com_list_res:
        return None
    print com_list_res
    return com_list_res

    # json.loads(com_list_res)

    # com = re.findall("queryInfor\('(.*?)','(.*?)','(.*?)','(.*?)','(.*?)','(.*?)','(.*?)'\)", com_list_res)
    # import pdb
    # pdb.set_trace()
    # if not com:
    #     return None     # 公司不存在
    # req.close()
    # print com[0]
    # return com[0]


def get_company_info(com_info):
    """
    下载网页、年报网页
    :param com_info:首页的网页
    :return:公司源码字典
    """
    raw_dict = {
        "province": "js",
        "type": "1",
        "html": {},
        "yearList": [],
        "keyword": "",
        "companyName": "",
        "json": "",
    }
    raw_html = {}
    com_info_list = json.loads(com_info)
    if not com_info_list:
        raise Exception("com_list 错误")
    raw_html['index'] = com_info_list[0]

    if 'queryInfor' not in com_info:
        raw_dict['html'] = raw_html
        return raw_dict

    # print json.dumps(json.loads(com_info)[0], indent=4, ensure_ascii=False)
    com_info = re.findall("queryInfor\('(.*?)','(.*?)','(.*?)','(.*?)','(.*?)','(.*?)','(.*?)'\)", com_info)
    if not com_info or len(com_info[0]) < 7:
        return raw_dict

    org, com_id, seq_id, reg_no = com_info[0][1:5]
    # 基本信息
    base_url = 'http://www.jsgsj.gov.cn:58888/ecipplatform/ciServlet.json?ciEnter=true'
    common_url = 'http://www.jsgsj.gov.cn:58888/ecipplatform/commonServlet.json?commonEnter=true'
    req = requests.session()
    req.headers = {
        'User-Agent': ua,
        'Referer': 'http://www.jsgsj.gov.cn:58888/ecipplatform/outer_fiei/outer_fiei_queryCorpInfor_gsRelease.jsp',
    }
    cst_time = time.strftime("%a %b %d %Y %H:%M:%S") + ' GMT+0800 (CST)'
    base_data = {
        'org': org,
        'id': com_id,
        'seq_id': seq_id,
        'specificQuery': 'basicInfo',
    }

    raw_base = req.post(url=base_url, data=base_data).content
    raw_html['base'] = raw_base

    # 股东信息
    # share_url = base_url
    share_data = {
        'CORP_ORG': org,
        'CORP_ID': com_id,
        'CORP_SEQ_ID': seq_id,
        'specificQuery': 'investmentInfor',
        'showRecordLine': '1',
        'pageNo': '1',
        'pageSize': '1000',
    }
    raw_share = req.post(url=base_url, data=share_data).content
    # print json.dumps(json.loads(raw_share), ensure_ascii=False, indent=4)
    raw_html['share'] = raw_share

    # # 股东详细信息, 第一轮先注释掉,请求数太多.
    # share_list = json.loads(raw_share).get('items') if raw_share else []
    # # todo
    # share_detail_url = 'http://www.jsgsj.gov.cn:58888/ecipplatform/ciServlet.json?ciDetail=true'
    # share_detail_list = []
    # for a_detail in share_list:
    #     share_detail_data = {
    #         'org': org,
    #         'ID': a_detail.get('C6'),
    #         'CORP_ORG': a_detail.get('CORP_ORG'),
    #         'CORP_ID': a_detail.get('CORP_ID'),
    #         'SEQ_ID': a_detail.get('C7'),
    #         'REG_NO': reg_no,
    #         'specificQuery': 'investorInfor',
    #     }
    #     a_raw_share_detail = req.post(url=share_detail_url, data=share_detail_data).content
    #     share_detail_list.append(a_raw_share_detail)

    # 变更信息
    # alter_url = common_url
    alter_data = {
        'showRecordLine': '1',
        'specificQuery': 'commonQuery',
        'propertiesName': 'biangeng',
        'corp_org': org,
        'corp_id': com_id,
        'tmp': cst_time,
        'pageNo': '1',
        'pageSize': '1000',
    }
    raw_alter = req.post(url=common_url, data=alter_data).content
    raw_html['alter'] = raw_alter

    # 备案信息
    # 主要人员
    # person_url = base_url
    person_data = {
        'CORP_ORG': org,
        'CORP_ID': com_id,
        'CORP_SEQ_ID': seq_id,
        'specificQuery': 'personnelInformation',
        'showRecordLine': '1',
        'pageNo': '1',
        'pageSize': '1000',
    }
    raw_person = req.post(url=base_url, data=person_data).content
    raw_html['person'] = raw_person
    # 分支机构信息
    # branch_url = base_url
    branch_data = {
        'CORP_ORG': org,
        'CORP_ID': com_id,
        'CORP_SEQ_ID': seq_id,
        'specificQuery': 'branchOfficeInfor',
        'showRecordLine': '1',
        'pageNo': '1',
        'pageSize': '1000',
    }
    raw_branch = req.post(url=base_url, data=branch_data).content
    raw_html['branch'] = raw_branch

    # 清算信息
    # liquidation_url = base_url
    liquidation_data = {
        'CORP_ORG': org,
        'CORP_ID': com_id,
        'CORP_SEQ_ID': seq_id,
        'specificQuery': 'qsfzr',
    }
    raw_liquidation = req.post(url=base_url, data=liquidation_data).content
    raw_html['liquidation'] = raw_liquidation

    # 抽查检查
    # check_url = common_url
    check_data = {
        'showRecordLine': '1',
        'specificQuery': 'commonQuery',
        'propertiesName': 'checkup',
        'corp_org': org,
        'corp_id': com_id,
        'tmp': cst_time,
        'pageNo': '1',
        'pageSize': '1000',
    }
    raw_check = req.post(url=common_url, data=check_data).content
    raw_html['check'] = raw_check

    # 经营异常
    # abnormal_url = common_url
    abnormal_data = {
        'showRecordLine': '1',
        'specificQuery': 'commonQuery',
        'propertiesName': 'abnormalInfor',
        'corp_org': org,
        'corp_id': com_id,
        'tmp': cst_time,
        'pageNo': '1',
        'pageSize': '1000',
    }
    raw_abnormal = req.post(url=common_url, data=abnormal_data).content
    raw_html['abnormal'] = raw_abnormal

    raw_dict['html'] = raw_html
    # 年报
    year_url = 'http://www.jsgsj.gov.cn:58888/ecipplatform/nbServlet.json?nbEnter=true'
    year_data = {
        'REG_NO': reg_no,
        'showRecordLine': '0',
        'specificQuery': 'gs_pb',
        'propertiesName': 'query_report_list',
        'tmp': cst_time,
    }
    raw_year = req.post(url=year_url, data=year_data).content
    raw_year = json.loads(raw_year) if raw_year else []

    year_base_url = year_url
    raw_year_list = []
    for a_item in raw_year:
        raw_year_dict = {}
        year = re.findall('\d+', a_item.get('REPORT_RESULT', ''))
        year = year[0] if year else ''
        year_id = a_item.get('ID', '')
        raw_year_dict['year'] = year

        # 年报基本信息
        year_base_data = {
            'ID': year_id,
            'OPERATE_TYPE': '2',
            'showRecordLine': '0',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_basicInfo',
            'tmp': cst_time,
        }
        raw_year_base = req.post(url=year_base_url, data=year_base_data).content
        raw_year_dict['base'] = raw_year_base

        # 年报网站信息
        year_web_data = {
            'REPORT_YEAR': year,
            'ID': year_id,
            'showRecordLine': '1',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_websiteInfo',
            'temp': cst_time,
            'pageNo': '1',
            'pageSize': '1000',
        }
        raw_year_web = req.post(url=year_base_url, data=year_web_data).content
        raw_year_dict['web'] = raw_year_web

        # 年报对外担保
        year_security_data = {
            'REPORT_YEAR': '2015',
            'ID': year_id,
            'showRecordLine': '1',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_InformationSecurity',
            'temp': cst_time,
            'pageNo': '1',
            'pageSize': '1000',
        }
        raw_year_security = req.post(url=year_base_url, data=year_security_data).content
        raw_year_dict['security'] = raw_year_security

        # 年报股权变更信息
        year_equity_data = {
            'REPORT_YEAR': '2015',
            'ID': year_id,
            'showRecordLine': '1',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_EquityTransfer',
            'tmp': cst_time,
        }
        raw_year_equity = req.post(url=year_base_url, data=year_equity_data).content
        raw_year_dict['equity'] = raw_year_equity

        # 年报股东（发起人）及出资信息
        year_share_data = {
            'MAIN_ID': year_id,
            'OPERATE_TYPE': '1',
            'TYPE': 'NZGS',
            'showRecordLine': '1',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_stockInfo',
            'ID': year_id,
            'ADMIT_MAIN': '10',
            'temp': cst_time,
            'pageNo': '1',
            'pageSize': '1000',
        }
        raw_year_share = req.post(url=year_base_url, data=year_share_data).content
        raw_year_dict['share'] = raw_year_share

        # 年报修改记录
        year_change_data = {
            'REPORT_YEAR': '2015',
            'REG_NO': reg_no,
            'showRecordLine': '1',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_RevisionRecord',
            'temp': cst_time,
            'pageNo': '1',
            'pageSize': '1000',
        }
        raw_year_change = req.post(url=year_base_url, data=year_change_data).content
        raw_year_dict['change'] = raw_year_change

        # 年报对外投资信息
        year_invest_data = {
            'REPORT_YEAR': 'report_year',
            'ID': year_id,
            'showRecordLine': '1',
            'specificQuery': 'gs_pb',
            'propertiesName': 'query_investInfo',
            'temp': cst_time,
            'pageNo': '1',
            'pageSize': '1000',
        }
        raw_year_invest = req.post(url=year_base_url, data=year_invest_data).content
        raw_year_dict['invest'] = raw_year_invest

        raw_year_list.append(raw_year_dict)

    raw_dict['yearList'] = raw_year_list

    return raw_dict


def extract_base_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_html = raw_dict.get("html", {})
    if not raw_html:
        raise Exception("raw_dict 错误")

    res_base_dict = copy.deepcopy(TE.void_base_dict)
    # 基本信息
    raw_base = raw_html.get("base", "")
    raw_base_list = json.loads(raw_base) if raw_base else []
    if not raw_base_list:
        # 根据index字段,解析吊销注销的公司,无url
        raw_index = raw_html.get("index", {})
        raw_index_info = raw_index.get('INFO', "")
        dt_s = re.findall('<dt>.*?</dd>', raw_index_info)
        if dt_s:
            ent_name = re.findall('<dt>(.*?)</dt>', dt_s[0])
            ent_name = ent_name[0] if ent_name else ''

            reg_no = re.findall('<dd>.*?:<span>(.*?)</span>', dt_s[0])
            reg_no = reg_no[0] if reg_no else ''

            fr_name = re.findall('</span>.*?:<span>(.*?)</span>', dt_s[0])
            fr_name = fr_name[0] if fr_name else ''

            reg_org = re.findall(u'登记机关:<span>(.*?)</span>', dt_s[0])
            reg_org = reg_org[0] if reg_org else ''

            cancel_data = re.findall(u'注销日期:<span>(.*?)</span>', dt_s[0])
            cancel_data = cancel_data[0] if cancel_data else ''

            revoke_data = re.findall('吊销日期:<span>(.*?)</span>', dt_s[0])
            revoke_data = revoke_data[0] if revoke_data else ''

            res_base_dict["basicList"] = [{
                'enterpriseName': ent_name,  # 企业名称
                'frName': fr_name,  # 法人姓名
                'regNo': reg_no,  # 工商注册号
                'regCap': '',  # 注册资金(单位:万元)
                'regCapCur': '',  # 注册币种
                'esDate': '',  # 开业日期(YYYY-MM-DD)
                'openFrom': '',  # 经营期限自(YYYY-MM-DD)
                'openTo': '',  # 经营期限至(YYYY-MM-DD)
                'enterpriseType': '',  # 企业(机构)类型
                'enterpriseStatus': '',  # 经营状态(在营、注销、吊销、其他)
                'cancelDate': cancel_data,  # 注销日期
                'revokeDate': revoke_data,  # 吊销日期
                'address': '',  # 注册地址
                'abuItem': '',  # 许可经营项目
                'cbuItem': '',  # 一般经营项目
                'operateScope': '',  # 经营(业务)范围
                'operateScopeAndForm': '',  # 经营(业务)范围及方式
                'regOrg': reg_org,  # 登记机关
                'ancheYear': '',  # 最后年检年度
                'ancheDate': '',  # 最后年检日期
                'industryPhyCode': '',  # 行业门类代码
                'industryPhyName': '',  # 行业门类名称
                'industryCode': '',  # 国民经济行业代码
                'industryName': '',  # 国民经济行业名称
                'recCap': '',  # 实收资本
                'oriRegNo': '',  # 原注册号
                'auditDate': '',  # 核准日期
            }]
    else:
        res_base_dict["basicList"] = [FO.transform_dict(TE.basic_dict, TR.basic_dict, raw_base_list[0])]
    res_base_dict["province"] = "js"

    # 股东信息
    raw_share = raw_html.get("share", "")
    raw_share = json.loads(raw_share).get("items", []) if raw_share else []
    share_holder_list = []
    for a_share in raw_share:
        share_holder_list.append(FO.transform_dict(TE.shareHolder_dict, TR.shareHolder_dict, a_share))
    res_base_dict["shareHolderList"] = share_holder_list

    # 变更信息
    raw_alter = raw_html.get("alter", [])
    raw_alter_list = json.loads(raw_alter).get("items", []) if raw_alter else []
    alter_list = []
    for a_alter in raw_alter_list:
        alter_list.append(FO.transform_dict(TE.alter_dict, TR.alter_dict, a_alter))
    res_base_dict["alterList"] = alter_list

    # 主要人员信息
    raw_person = raw_html.get("person", [])
    raw_person_list = json.loads(raw_person).get("items", []) if raw_person else []
    person_list = []
    for a_person in raw_person_list:
        a_person_temp_1 = {
            'name': a_person.get('POSITION_NAME1', '') or '',
            'position': a_person.get('PERSON_NAME1', '') or '',
            'sex': '',
        }
        a_person_temp_2 = {
            'name': a_person.get('POSITION_NAME2', '') or '',
            'position': a_person.get('PERSON_NAME2', '') or '',
            'sex': '',
        }
        if any(a_person_temp_1.values()):
            person_list.append(a_person_temp_1)
        if any(a_person_temp_2.values()):
            person_list.append(a_person_temp_2)
    res_base_dict["personList"] = person_list

    # 分支机构信息
    raw_branch = raw_html.get("branch", [])
    raw_branch_list = json.loads(raw_branch).get("items", []) if raw_branch else []
    filiation_list = []
    for a_branch in raw_branch_list:
        temp_filiation = FO.transform_dict(TE.filiation_dict, TR.filiation_dict, a_branch)
        if any(temp_filiation.values()):
            filiation_list.append(temp_filiation)
    res_base_dict["filiationList"] = filiation_list

    # 清算信息
    # TODO
    # raw_liquidation = raw_html.get("liquidation", [])
    # raw_liquidation_list = json.loads(raw_liquidation) if raw_liquidation else []
    # '[{"ACCOUNT_MAN":"","ACCOUNT_MEMBER":""}]'
    # liquidation_list = []
    # for a_liquidation in liquidation_list:
    #     pass
    # res_base_dict["liquidationList"] = liquidation_list

    # 经营异常
    raw_abnormal = raw_html.get("abnormal", [])
    raw_abnormal_list = json.loads(raw_abnormal).get("items", []) if raw_abnormal else []
    abnormal_list = []
    for a_abnormal in raw_abnormal_list:
        abnormal_list.append(FO.transform_dict(TE.abnormalOperation_dict, TR.abnormalOperation_dict, a_abnormal))
    res_base_dict["abnormalOperation"] = abnormal_list

    # 抽查检查
    raw_check_message = raw_html.get("check", [])
    raw_check_message_list = json.loads(raw_check_message).get("items", []) if raw_check_message else []
    check_list = []
    for a_check in raw_check_message_list:
        check_list.append(FO.transform_dict(TE.checkMessage_dict, TR.checkMessage_dict, a_check))
    res_base_dict["checkMessage"] = check_list

    # 清洗
    res_base_dict = FO.clean_all(res_base_dict)

    return res_base_dict


def extract_year_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_year_list = raw_dict.get("yearList", [])

    res_year_list = []
    for a_year in raw_year_list:
        res_year_dict = copy.deepcopy(TE.void_year_dict)
        year = a_year.get("year", "")
        res_year_dict["year"] = year

        # 基本信息
        raw_year_base = a_year.get("base", "")
        raw_year_base_list = json.loads(raw_year_base) if raw_year_base else []
        for item in raw_year_base_list:
            res_year_dict["baseInfo"] = FO.transform_dict(TE.baseInfo_dict, TR.baseInfo_dict, item)

        # 网站
        raw_year_website = a_year.get("web", "")
        raw_year_website_list = json.loads(raw_year_website).get("items", []) if raw_year_website else []
        res_year_dict["website"] = FO.transform_dict(TE.website_dict, TR.website_dict,
                                                     raw_year_website_list[0]) if raw_year_website_list else {}

        # 企业资产状况信息
        # 隐藏在基本信息里面
        for item in raw_year_base:
            temp_assets = FO.transform_dict(TE.assetsInfo_dict, TR.assetsInfo_dict, item)
            if any(temp_assets.values()):
                res_year_dict["assetsInfo"] = temp_assets

        # 对外投资信息
        raw_year_invest = a_year.get("invest", "")
        raw_year_invest_list = json.loads(raw_year_invest).get("items", []) if raw_year_invest else []
        year_entinv_list = []
        for item in raw_year_invest_list:
            year_entinv_list.append(FO.transform_dict(TE.entinvItem_dict, TR.year_entinvItem_dict, item))
        res_year_dict["entinvItemList"] = year_entinv_list

        # 发起人及出资信息
        raw_year_investor = a_year.get("share", "")
        raw_year_investor_list = json.loads(raw_year_investor).get("items", []) if raw_year_investor else []
        year_investor_list = []
        for item in raw_year_investor_list:
            year_investor_list.append(
                FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict, item))
        res_year_dict["investorInformations"] = year_investor_list

        # 对外提供保证担保信息
        # raw_year_guarantee = a_year.get("security", "")
        # raw_year_guarantee_list = json.loads(raw_year_guarantee).get("items", []) if raw_year_guarantee else []
        # # for item in raw_year_guarantee_list:
        #     TODO
        #     pass

        # 股权变更信息
        # TODO
        raw_year_equity_change = a_year.get("equity", "")
        raw_year_equity_change_list = json.loads(raw_year_equity_change).get("items",
                                                                             []) if raw_year_equity_change else []
        year_equity_list = []
        for item in raw_year_equity_change_list:
            year_equity_list.append(
                FO.transform_dict(TE.equityChangeInformations_dict, TR.equityChangeInformations_dict, item))
        res_year_dict["equityChangeInformations"] = year_equity_list

        # 修改记录
        raw_year_alter = a_year.get("change", "")
        raw_year_alter_list = json.loads(raw_year_alter).get("items", []) if raw_year_alter else []
        year_change_list = []
        for item in raw_year_alter_list:
            year_change_list.append(FO.transform_dict(TE.changeRecords_dict, TR.changeRecords_dict, item))
        res_year_dict["changeRecords"] = year_change_list

        # 清洗
        res_year_dict = FO.clean_all(res_year_dict)

        res_year_list.append(res_year_dict)

    return res_year_list


def search2(companyName, MAXTIME=40):
    '''
    海南：hain
    :param companyName:公司名字或注册号
    :return:若公司存在返回公司信息
            若不存在返回None
    '''
    res = ''
    asic_dict = {}
    # MAXTIME = 20
    a_time = MAXTIME
    while a_time > 0:
        # print res, '*'*20
        if res is None:  # 公司不存在
            return None
        elif res == '':  # 验证码错误
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                # time.sleep(10)
                res = download_captcha_kill(companyName)
                # print res
            except Exception, e:
                traceback.print_exc(e)
                raise e
        else:
            break
    com_list = res
    res = get_company_info(com_list)
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)
    else:
        raw_dict = res
        try:
            asic_dict = extract_base_info(raw_dict)
            year_list = extract_year_info(raw_dict)
            company_name = asic_dict['basicList'][0].get('enterpriseName', '')
            company_name = company_name if company_name else companyName
            res['companyName'] = company_name

            asic_dict['yearReportList'] = year_list
            gate_method = {
                'url': 'http://www.jsgsj.gov.cn:58888/province/',
                'method': 'post',
                'province': 'js',
                'companyName': company_name,
                'data': com_list,
            }

            return res, asic_dict, gate_method

        except Exception, e:
            logger.info(e)
            res['companyName'] = companyName
            gate_method = {
                'url': 'http://www.jsgsj.gov.cn:58888/province/',
                'method': 'post',
                'province': 'js',
                'companyName': companyName,
                'data': com_list,
            }
            return res, None, gate_method


def search(companyName):
    # 校验输入
    if not companyName:
        return None

    res = search2(companyName)
    if res is None:
        return None
    else:
        return res[1]


def search3(gate_method):
    if 'data' not in gate_method:
        raise Exception("gate_method error, doesn't have `data` key")
    com_list = gate_method.get('data')
    res = get_company_info(com_list)
    companyName = gate_method.get('companyName', '')

    raw_dict = res
    try:
        asic_dict = extract_base_info(raw_dict)
        year_list = extract_year_info(raw_dict)
        company_name = asic_dict['basicList'][0].get('enterpriseName', '')
        company_name = company_name if company_name else companyName
        res['companyName'] = company_name

        asic_dict['yearReportList'] = year_list
        gate_method = {
            'url': 'http://www.jsgsj.gov.cn:58888/province/',
            'method': 'post',
            'province': 'js',
            'companyName': company_name,
            'data': com_list,
        }

        return res, asic_dict, gate_method

    except Exception, e:
        logger.info(e)
        res['companyName'] = companyName
        gate_method = {
            'url': 'http://www.jsgsj.gov.cn:58888/province/',
            'method': 'post',
            'province': 'js',
            'companyName': companyName,
            'data': com_list,
        }
        return res, None, gate_method


if __name__ == "__main__":
    # companyName = '江苏赛德儿童教育管理有限公司'
    # companyName = '江苏通用科技股份有限公司'
    # companyName = '江苏三恒科技股份有限公司'
    # companyName = '江苏飞驰股份有限公司'

    # 年报信息完整
    # companyName = '江苏克胜集团股份有限公司'

    # companyName = '江苏永鼎股份有限公司'

    # 人员翻页,年报对外投资翻页
    # companyName = '江苏弘业股份有限公司'
    # companyName = '江苏飞船股份有限公司'

    # 股东（发起人）信息翻页， 年报对外投资信息 翻页
    # companyName = '江苏大为科技股份有限公司'

    # 抽查检查、经营异常
    # companyName = '新沂市鸿畅商贸有限公司'
    companyName = '江苏苏宁易贷金融信息服务有限公司'
    # companyName = '宜兴市二十一世纪网络科技有限公司'
    # companyName = '泰州市玉源通信建设有限公司'
    #
    # companyName = '常州市正大铁路配件厂'
    # companyName = '扬州市邗江万鑫印染辅料制衣厂'
    # companyName = '连云港市喜乐烟花有限公司建设中路分公司'
    # companyName = '宜兴市吴扣华紫砂艺术馆'
    # companyName = '南京金金霞实业有限公司小太阳儿童用品商场'
    # companyName = '金湖县供销社（集团）总公司'
    # companyName = '丰县凤城同德商厦'
    # companyName = '苏州青年旅行社股份有限公司华联营业部'
    # companyName = '苏州新区枫桥兴达工艺刻字店'
    # companyName = '宜兴市天健医药连锁有限公司张渚人民药店'
    # companyName = '苏州国发泰民投资企业（有限合伙）'


    # companyName = '南京卓讯科技有限公司'
    # companyName = '南京华捷投资咨询有限责任公司'

    # companyName = '江苏首佳房地产评估咨询事务所有限公司'
    # companyName = '泰州市运通汽车商贸有限公司'

    # companyName = '徐州双奇服饰贸易商行'
    # companyName = '徐州市苏北惠民房地产评估有限公司'
    # companyName = '常州市常富通信设备厂'
    # companyName = '常州市天康医疗用品厂'
    # companyName = '南京莘南机电有限公司'
    # companyName = '中国电信股份有限公司丹阳全州营业所'

    result = search2(companyName)
    import json

    print json.dumps(result, indent=4, ensure_ascii=False)

    # with open('/home/huangyu/下载/zdw.png') as fp:
    #     zdw_res = fp.read()
    # captcha = kill_captcha(zdw_res, 'zdw', 'png')
    # print captcha

# CAPTCHA_URL=http://192.168.31.43:44444/captcha
